#encoding=utf-8
import requests
from lxml import html
import pandas
def get_url():
    headers={
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "zh-CN,zh;q=0.9",
    "cache-control": "max-age=0",
    "cookie": "SERVERID=e861f758a7dc0c8cfeac76f2accad538|1613622939|1613622939; Hm_lvt_3eec0b7da6548cf07db3bc477ea905ee=1611833628,1613622779; Hm_lpvt_3eec0b7da6548cf07db3bc477ea905ee=1613622779; __gads=ID=2382ac78dec7d008-22e3ea160fc6008a:T=1613622939:RT=1613622939:S=ALNI_MbBFEckmD2_XSY7wWgppkzycAfF7w; _ga=GA1.2.1537915350.1613622779; _gid=GA1.2.452437427.1613622780",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}
    url='https://www.runoob.com/html/html-tutorial.html'
    res=requests.get(url,headers=headers)
    tree = html.fromstring(res.text)
    names = tree.xpath('//*[@id="leftcolumn"]/a/text()')
    hrefs = tree.xpath('//*[@id="leftcolumn"]/a/@href')
    shouji=[]
    wt_list=[]
    for nn in names:
        nn=nn.replace('\n','').replace('\t','').strip()
        if nn=='HTML5 教程':
            break
        shouji.append(nn)
    for index,value in enumerate(shouji):
        wt_list.append({'name':value,'url':'https://www.runoob.com'+hrefs[index]})
    pd=pandas.DataFrame(wt_list)
    pd.to_excel('save.xlsx',index=False)



if __name__ == '__main__':
    get_url()